** indiv id: cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex

** 00-04 use housing characteristics which are available in the 2nd quarter to set up unique id! 
/* variaveis pre codified by ENEGI with uniform construction of variables
clase1 
clase2
o_iv_ins
o_seg_soc
*/

clear all
cap log close
set more off

local origdata 	"D:\Mexico\Data_source\ENOE\Data\"
local savedata 	"C:\Users\rgi021\"
local rollout 	"C:\Users\rgi021\"

**#------------------------2000-2004----------------------------------------------------------------
**********************
** socio demographic
**********************
* ano 2000
forval Q =2/4 {
foreach Y in 00 {
use "`origdata'20`Y'_Q`Q'/ene`Q'`Y'.dta", clear
qui renvars *, lower
destring, replace
tostring  v_sel ageb, replace
foreach K in 1 2 3 4 5 6 7 8 9 {
	replace v_sel ="`K'" if   v_sel =="0`K'"
}

** clean up age and define presence of old and young people in hh
rename r_trh n_ren 

ge parentes = par

ge head = parentes == 1
ge spouse = parentes == 2

ge firm_size1 = p3b  /*for Trabajador por su cuenta, patron*/
replace firm_size1 = . if p3b == 9 |  p3b == 99 /*no sabe*/

ge firm_size2 = p3e 
replace firm_size2 = . if p3e == 9 |  p3e == 99 /*no sabe*/ 

ge firm_size = firm_size1 
replace firm_size = firm_size2 if firm_size1 == 0 & firm_size2 >= 1 & firm_size2 <= 8/* p3e registers size for: trabajador a destajo, comisiÃ³n o porcentaje, 
trabajador a sueldo fijo, salario o jornal miembro de una cooperativa, trabajador familiar sin pago, trabajador no familiar sin pago*/
replace firm_size = . if firm_size == 0

lab var firm_size "Firm Size"
lab define firm_size 1 "1 persona" 2  "2 a 5 personas" 3 "6 a 10 personas" 4  "11 a 15 personas" 5 "16 a 50 personas" 6 "51 a 100 personas" 7 "101 a 250 personas" 8 "251 y mÃ¡s personas"
lab value firm_size firm_size

ge age = eda
replace age = . if age == 98 | age == 99
ge aux = age <= 5 & parentes == 3
bys a_met ent con v_sel hog h_mud est ageb sec: egen dchild0_5 = max(aux)
lab var dchild0_5 "Children 0-5 in fam"
drop aux

ge aux = age <= 14 & parentes == 3
bys a_met ent con v_sel hog h_mud est ageb sec: egen dchild0_14 = max(aux)
lab var dchild0_14 "Children 0-14 in fam"
drop aux

ge aux = age >= 65 & age !=. & parentes == 4
bys a_met ent con v_sel hog h_mud est ageb sec: egen elderly = max(aux)
lab var elderly  "Elderly in famly"
drop aux

** Labor market
ge work =  p1a1
recode work (2=0) (0=.)

* Definition of informality: access to social security through job
ge socsec = 1 		if p7d_4 == 1 | p7d_5 == 1 |  p7d_8 == 1 /*formal: health insurance by IMSS, ISSSTE, or private*/
replace socsec = 2	if p7d_4 == 2 & p7d_5 == 2 &  p7d_8 == 2 /*no access*/
la define socsec 1 "Access to Medical Services" 2 "No Access" 
la val socsec socsec

* Variables
rename c_ocu11c occupation
rename rama_est1 sector
rename rama_est2 sector_detailed
rename ambito2 ambito

for num 1/9: rename p7d_X med_assis_X

lab var ing_x_hrs "hourly wage" 
lab var hrsocup "weekly hours"
lab var ingocup "Montly wage"

rename p1b motive_notwork 
ge health_shock_nw = 0 if p1a1 == 2
replace health_shock_nw = 1 if motive_notwork == 2 | motive_notwork == 4 | p1f == 5 | p2c == 6 | p2e == 9 //not working or looking for work due to health
lab var health_shock_nw "Not working due to health motives"
* added 17/4 by rg: p1f: p1f Cuál es el motivo por el que... no trabaja? 		5 Está incapacitado para trabajar por el resto de su vida

rename p6b motive_notworko
ge health_shock_w = 0 if p1a1 == 1
replace health_shock_w = 1 if motive_notworko == 7 & p1a1 == 1 //working reduced hours due to health
lab var health_shock_w "Health shock if working"

gen quarter=20`Y'`Q'
keep work socsec clase1 clase2 clase3 *niv_ins* *seg_soc* ent est ageb sec mun eda l_nac sex n_ent quarter ///
	a_met est con v_sel hog h_mud c_res n_ren  parentes o_pos_ocu s_pos_ocu dchild0_5 dchild0_14 elderly ///
	ing_x_hrs hrsocup ingocup t_reg a_lis man e_civ ing7c  e_con emple7c t_loc firm_size occupation sector ///
	sector_detailed ambito health_shock_w health_shock_nw
sort a_met ent con v_sel hog h_mud n_ren sex quarter 
save "`savedata'ene`Q'`Y'_d.dta", replace
clear
}
}

* identificação do Head
local Q = 2
foreach Y in 00 {
use "`origdata'20`Y'_Q`Q'/ene`Q'`Y'.dta", clear
qui renvars *, lower
destring, replace
tostring  v_sel ageb, replace
foreach K in 1 2 3 4 5 6 7 8 9 {
	replace v_sel ="`K'" if   v_sel =="0`K'"
}

ge parentes = par
keep if par == 1

ge age = eda
replace age = . if age == 98 | age == 99
ge head_age = age

ge head_male = sex == 2 if sex <= 2

gen quarter=20`Y'`Q'
keep  a_met ent con v_sel hog h_mud head_age head_male quarter
sort a_met ent con v_sel hog h_mud quarter
save "`savedata'/head`Q'`Y'_d.dta", replace
clear
}

* Base do hogar
local Q = 2
foreach Y in 00 {
use "`origdata'20`Y'_Q`Q'/hog-`Q'`Y'.dta", clear
qui renvars *, lower
destring, replace
tostring  v_sel ageb, replace
foreach K in 1 2 3 4 5 6 7 8 9 {
	replace v_sel ="`K'" if   v_sel =="0`K'"
}

gen quarter=20`Y'`Q'
keep ent est ageb sec mun   quarter a_met est ageb con v_sel hog h_mud t_reg t_res l_viv a_lis man t_hog men_12 may_12 per 
sort a_met ent con v_sel hog h_mud quarter
save "`savedata'/hog`Q'`Y'_d.dta", replace
clear

}



* ano 2001 a 2004
forval Q =1/4 {
foreach Y in  01 02 03 04 {
use "`origdata'20`Y'_Q`Q'/ene`Q'`Y'.dta", clear
qui renvars *, lower
destring _all, replace
tostring  v_sel ageb c_res, replace
foreach K in 1 2 3 4 5 6 7 8 9 {
	replace v_sel ="`K'" if   v_sel =="0`K'"
}

** clean up age and define presence of old and young people in hh
rename r_trh n_ren 

ge parentes = par

ge head = parentes == 1
ge spouse = parentes == 2

ge firm_size1 = p3b  /*for Trabajador por su cuenta, patron*/
replace firm_size1 = . if p3b == 9 |  p3b == 99 /*no sabe*/

ge firm_size2 = p3e 
replace firm_size2 = . if p3e == 9 |  p3e == 99 /*no sabe*/ 

ge firm_size = firm_size1 
replace firm_size = firm_size2 if firm_size1 == 0 & firm_size2 >= 1 & firm_size2 <= 8/* p3e registers size for: trabajador a destajo, comisiÃ³n o porcentaje, 
trabajador a sueldo fijo, salario o jornal miembro de una cooperativa, trabajador familiar sin pago, trabajador no familiar sin pago*/
replace firm_size = . if firm_size == 0

lab var firm_size "Firm Size"
lab define firm_size 1 "1 persona" 2  "2 a 5 personas" 3 "6 a 10 personas" 4  "11 a 15 personas" 5 "16 a 50 personas" 6 "51 a 100 personas" 7 "101 a 250 personas" 8 "251 y mÃ¡s personas"
lab value firm_size firm_size

ge age = eda
replace age = . if age == 98 | age == 99
ge aux = age <= 5 & parentes == 3
bys a_met ent con v_sel hog h_mud est ageb sec: egen dchild0_5 = max(aux)
lab var dchild0_5 "Children 0-5 in fam"
drop aux

ge aux = age <= 14 & parentes == 3
bys a_met ent con v_sel hog h_mud est ageb sec: egen dchild0_14 = max(aux)
lab var dchild0_14 "Children 0-14 in fam"
drop aux

ge aux = age >= 65 & age !=. & parentes == 4
bys a_met ent con v_sel hog h_mud est ageb sec: egen elderly = max(aux)
lab var elderly  "Elderly in famly"
drop aux

** Labor market
ge work =  p1a1
recode work (2=0) (0=.)

* Definition of informality: access to social security through job
ge socsec = 1 		if p7d_4 == 1 | p7d_5 == 1 |  p7d_8 == 1 /*formal: health insurance by IMSS, ISSSTE, or private*/
replace socsec = 2	if p7d_4 == 2 & p7d_5 == 2 &  p7d_8 == 2 /*no access*/
la define socsec 1 "Access to Medical Services" 2 "No Access" 
la val socsec socsec

* Variables
rename c_ocu11c occupation
rename rama_est1 sector
rename rama_est2 sector_detailed
rename ambito2 ambito

for num 1/9: rename p7d_X med_assis_X

lab var ing_x_hrs "hourly wage" 
lab var hrsocup "weekly hours"
lab var ingocup "Montly wage"

rename p1b motive_notwork 
ge health_shock_nw = 0 if p1a1 == 2
replace health_shock_nw = 1 if motive_notwork == 2 | motive_notwork == 4 | p1f == 5 | p2c == 6 | p2e == 9 //not working or looking for work due to health
lab var health_shock_nw "Not working due to health motives"
* p1f: p1f Cuál es el motivo por el que... no trabaja? 		5 Está incapacitado para trabajar por el resto de su vida

rename p6b motive_notworko
ge health_shock_w = 0 if p1a1 == 1
replace health_shock_w = 1 if  motive_notworko == 7& p1a1 == 1  //working reduced hours due to health
lab var health_shock_w "Health shock if working"


compress
gen quarter=20`Y'`Q'
keep work socsec clase1 clase2 clase3 *niv_ins* *seg_soc* ent mun est ageb sec eda l_nac sex n_ent quarter ///
	est ageb con v_sel hog h_mud c_res n_ren  parentes dchild0_5 dchild0_14 elderly ing_x_hrs hrsocup ingocup t_reg a_lis ///
	a_met man e_civ ing7c o_pos_ocu s_pos_ocu  e_con emple7c t_loc firm_size occupation sector sector_detailed ///
	ambito health_shock_w health_shock_nw 
sort a_met ent con v_sel hog h_mud n_ren sex quarter 
save "`savedata'ene`Q'`Y'_d.dta", replace

}
}

* Identificação do Head
local Q = 2
foreach Y in  01 02 03 04 {
*foreach Y in  01 02 {
use "`origdata'20`Y'_Q`Q'/ene`Q'`Y'.dta", clear
qui renvars *, lower
destring, replace
tostring  v_sel ageb, replace
foreach K in 1 2 3 4 5 6 7 8 9 {
	replace v_sel ="`K'" if   v_sel =="0`K'"
}

ge parentes = par
keep if par == 1

ge age = eda
replace age = . if age == 98 | age == 99
ge head_age = age

ge head_male = sex == 2 if sex <= 2

gen quarter=20`Y'`Q'
keep  a_met ent con v_sel hog h_mud head_age head_male quarter
sort a_met ent con v_sel hog h_mud quarter
save "`savedata'head`Q'`Y'_d.dta", replace
clear
}

* Base do hogar
local Q = 2
foreach Y in 01 02 03 04{
use "`origdata'20`Y'_Q`Q'/hog-`Q'`Y'.dta", clear
qui renvars *, lower
destring, replace
tostring  v_sel ageb, replace
foreach K in 1 2 3 4 5 6 7 8 9 {
	replace v_sel ="`K'" if   v_sel =="0`K'"
}
gen quarter=20`Y'`Q'
keep ent est ageb sec mun  quarter a_met est con v_sel hog h_mud t_reg  per t_reg t_res l_viv a_lis man men_12 may_12  t_hog per 
sort a_met ent con v_sel hog h_mud quarter
save "`savedata'hog`Q'`Y'_d.dta", replace
clear
}


***************************
*merge
***************************
use "`savedata'ene200_d.dta", clear
for num 3 4: append using  "`savedata'eneX00_d.dta"
sort a_met ent con v_sel hog h_mud 
merge  a_met ent con v_sel hog h_mud using  "`savedata'hog200_d.dta" 
tab _merge 
keep if _merge == 3
drop _merge
sort a_met ent con v_sel hog h_mud 
merge  a_met ent con v_sel hog h_mud using  "`savedata'head200_d.dta" 
tab _merge 
keep if _merge == 3
drop _merge
save "`savedata'newene00_d.dta", replace
foreach Y in 01 02 03 04{
	use "`savedata'ene1`Y'_d.dta", clear
	for num 2 3 4: append using  "`savedata'eneX`Y'_d.dta"
	sort a_met ent con v_sel hog h_mud 
	merge  a_met ent con v_sel hog h_mud using  "`savedata'hog2`Y'_d.dta"
	tab _merge 
	keep if _merge == 3
	drop _merge
	sort a_met ent con v_sel hog h_mud 
	merge  a_met ent con v_sel hog h_mud using  "`savedata'head2`Y'_d.dta"
	tab _merge 
	keep if _merge == 3
	drop _merge
	save "`savedata'newene`Y'_d.dta", replace
}

use "`savedata'newene00_d.dta", clear
for num 1 2 3 4: append using "`savedata'newene0X_d.dta"
drop if n_ren == . | sex == . |  l_nac ==. | l_nac == 99 | head_age == . | head_male == .

tostring quarter, replace
ge year = substr(quarter,1,4)
ge qt = substr(quarter,-1,1)
destring quarter year qt, replace


ge insurance = o_seg_soc if o_seg_soc > 0 & o_seg_soc < 3 
replace insurance = a_seg_soc if a_seg_soc > 0 & a_seg_soc < 3 & insurance == . 
recode insurance (2=0) 

ge edu = niv_inst_t
drop *niv_in*

ge mun2 = ""
ge aux = mun
tostring mun, replace
ge zero = "0"
egen mun2b = concat(zero mun) if aux > 9 & aux < 100
egen mun2c = concat(zero zero mun) if aux < 10

replace mun2 = mun if aux > 99 & aux !=. 
replace mun2 = mun2b if aux > 9 & aux < 100
replace mun2 = mun2c if aux < 10
destring mun, replace
drop aux mun2b mun2c

ge con2 = ""
ge aux = con
tostring con, replace
egen con2b = concat(zero con) if aux > 99 & aux < 1000
egen con2c = concat(zero zero con) if aux > 9 & aux < 100
egen con2d = concat(zero zero zero con) if aux < 10

replace con2 = con if aux > 999 & aux !=. 
replace con2 = con2b if aux > 99 & aux < 1000
replace con2 = con2c if aux > 9 & aux < 100
replace con2 = con2d if aux < 10
destring con, replace
drop aux con2b con2c con2d

ge ent2 = ""
ge aux = ent 
tostring ent, replace
egen ent2b = concat(zero ent) if aux < 10

replace ent2 = ent if aux > 9 & aux !=. 
replace ent2 = ent2b if aux < 10
destring ent, replace
drop ent2b zero aux 

egen id=concat(a_met mun2 ent con2 v_sel hog h_mud n_ren sex)

 
* IDENTIFICADOR ROBUSTO
*n_ent = número de entrevista2
*n_enc = tiempo

sort quarter 
egen n_enc = group(quarter)

keep if n_ent < 6
egen n_grup=concat(n_ent n_enc)
destring  n_grup, replace
 
gen grup=.
 
quietly{
	replace grup=1 if n_grup==41 |n_grup==52
	replace grup=2 if n_grup==31 |n_grup==42 | n_grup==53
	replace grup=3 if n_grup==21 |n_grup==32 | n_grup==43 |n_grup==54
	replace grup=4 if n_grup==11 |n_grup==22 | n_grup==33 |n_grup==44 |n_grup==55
	replace grup=5 if n_grup==12 |n_grup==23 | n_grup==34 |n_grup==45 |n_grup==56
	replace grup=6 if n_grup==13 |n_grup==24 | n_grup==35 |n_grup==46 |n_grup==57
	replace grup=7 if n_grup==14 |n_grup==25 | n_grup==36 |n_grup==47 |n_grup==58
	replace grup=8 if n_grup==15 |n_grup==26 | n_grup==37 |n_grup==48 |n_grup==59
	replace grup=9 if n_grup==16 |n_grup==27 | n_grup==38 |n_grup==49 |n_grup==510
	replace grup=10 if n_grup==17 |n_grup==28 | n_grup==39 |n_grup==410 |n_grup==511
	replace grup=11 if n_grup==18 |n_grup==29 | n_grup==310 |n_grup==411 |n_grup==512
	replace grup=12 if n_grup==19 |n_grup==210 | n_grup==311 |n_grup==412 |n_grup==512
	replace grup=13 if n_grup==110 |n_grup==211 | n_grup==312 |n_grup==413 |n_grup==514
	replace grup=14 if n_grup==111 |n_grup==212 | n_grup==313 |n_grup==414 |n_grup==515
	replace grup=15 if n_grup==112 |n_grup==213 | n_grup==314 |n_grup==415 |n_grup==516
	replace grup=16 if n_grup==113 |n_grup==214 | n_grup==315 |n_grup==416 |n_grup==517
	replace grup=17 if n_grup==114 |n_grup==215 | n_grup==316 |n_grup==417 |n_grup==518
	replace grup=18 if n_grup==115 |n_grup==216 | n_grup==317 |n_grup==418 |n_grup==519
	replace grup=19 if n_grup==116 |n_grup==217 | n_grup==318 |n_grup==419 |n_grup==520
	replace grup=20 if n_grup==117 |n_grup==218 | n_grup==319 |n_grup==420 |n_grup==521
	replace grup=21 if n_grup==118 |n_grup==219 | n_grup==320 |n_grup==421 |n_grup==522
	replace grup=22 if n_grup==119 |n_grup==220 | n_grup==321 |n_grup==422 |n_grup==523
	replace grup=23 if n_grup==120 |n_grup==221 | n_grup==322 |n_grup==423 |n_grup==524
	replace grup=24 if n_grup==121 |n_grup==222 | n_grup==323 |n_grup==424 |n_grup==525
	replace grup=25 if n_grup==122 |n_grup==223 | n_grup==324 |n_grup==425 |n_grup==526
	replace grup=26 if n_grup==123 |n_grup==224 | n_grup==325 |n_grup==426 |n_grup==527
	replace grup=27 if n_grup==124 |n_grup==225 | n_grup==326 |n_grup==427 |n_grup==528
	replace grup=28 if n_grup==125 |n_grup==226 | n_grup==327 |n_grup==428 |n_grup==529
	replace grup=29 if n_grup==126 |n_grup==227 | n_grup==328 |n_grup==429 |n_grup==530
	replace grup=30 if n_grup==127 |n_grup==228 | n_grup==329 |n_grup==430 |n_grup==531
	replace grup=31 if n_grup==128 |n_grup==229 | n_grup==330 |n_grup==431 |n_grup==532
	replace grup=32 if n_grup==129 |n_grup==230 | n_grup==331 |n_grup==432 |n_grup==533
	replace grup=33 if n_grup==130 |n_grup==231 | n_grup==332 |n_grup==433 |n_grup==534
	replace grup=34 if n_grup==131 |n_grup==232 | n_grup==333 |n_grup==434 |n_grup==535
	replace grup=35 if n_grup==132 |n_grup==233 | n_grup==334 |n_grup==435 |n_grup==536
	replace grup=36 if n_grup==133 |n_grup==234 | n_grup==335 |n_grup==436 |n_grup==537
	replace grup=37 if n_grup==134 |n_grup==235 | n_grup==336 |n_grup==437 |n_grup==538
	replace grup=38 if n_grup==135 |n_grup==236 | n_grup==337 |n_grup==438 |n_grup==539
	replace grup=39 if n_grup==136 |n_grup==237 | n_grup==338 |n_grup==439 |n_grup==540
	replace grup=40 if n_grup==137 |n_grup==238 | n_grup==339 |n_grup==440
	replace grup=41 if n_grup==138 |n_grup==239 | n_grup==340
	replace grup=42 if n_grup==139 |n_grup==240
}
 
egen id_ib =concat(id grup)

tab quarter
bys id_ib  quarter: ge n = _N
tab n 
drop if n == 2
drop n
tab quarter

* now check the ids! 
bys id_ib: ge n = _N
tab n 
drop if n > 5
drop n

keep if hog == 1
egen idh=concat(a_met ent mun2 con v_sel hog h_mud)
egen idh2=concat(idh grup)


bys idh2 quarter (parentes): ge n = _n
replace n = . if parentes > 1
tab n
drop n

foreach var in a_met mun ent con hog h_mud n_ren sex {
	bys id_ib: egen min = min(`var')
	bys id_ib: egen max = max(`var')
	drop if min != max
	drop max min
	}

drop man ageb id
compress
save "`savedata'eneo_clean00_04_rev", replace
for num 0 1 2 3 4: erase "`savedata'newene0X_d.dta"


**#------------------------2005-2012----------------------------------------------------------------
**********************
** socio demographic
**********************

forval Q =1/4 {
	foreach Y in 05 06 07 08 09 10 11 12 {
		use "`origdata'20`Y'_Q`Q'/sdemt`Q'`Y'.dta", clear
		qui renvars *, lower
		destring, replace
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup


		***************************
		*Construction of variables*
		***************************
		/*
		Para cada vivienda existe uno o mais hogares, para cada hogar existe uno o mais residentes, 
		para cada residente de 12 anos y mais existe un cuestionario de ocupacion y empleo.

		Si se va a trabajar con las caracteristicas sociodemograficas de la poblacion total de la tabla de datos
		SDEMT<per>.DBF, es necesario eliminar los registros con el Campo R_DEF diferente de 00, que
		corresponden a entrevistas incompletas o no logradas; as’ como los registros del campo C_RES iguales a 2,
		que son registros de personas que ya no son residentes de la vivienda y por tanto ya no son parte del hogar.

		Si se va a trabajar exclusivamente con las poblacion de 12 anos y mais, tanto sus caracteristicas
		sociodemograficas como laborales, se recomienda, adicionalmente eliminar de la Tabla SDEMT<per>.DBF los
		registros del campo EDA que correspondan a los codigos 00 a 11 y 99, ya que estos refieren a personas
		menores de 12 a–os de edad y dado que en las tablas COE1 y COE2 solo se incluyen personas de 12 anos y
		mais residentes con entrevista completa, con la llave correspondiente se pueden pegar las tablas y generar
		una tabla unica con las informaci—n contenida en dichas tablas.

		Es importante tener en cuenta que aunque en la ENOE se aplica el cuestionario de ocupacion y empleo a todas las
		personas de 12 anos en adelante, la explotaci—n de sus resultados se limita a las personas de 14 anos y mais, de
		acuerdo con la Ley Federal del Trabajo, que establece los 14 anos como la edad "legal" minima para trabajar.
		*/

		drop if r_def!=00
		drop if c_res==2
		drop cs_p14_c 

		** assign same coding as in ene
		ge parentes = 1 if par_c == 101 | par_c == 102 // head
		replace parentes = 2 if par_c == 201 //spouse
		replace parentes = 3 if par_c == 301 | par_c == 302 | par_c == 303 | par_c == 304 | par_c == 305 //hijos
		replace parentes = 4 if par_c == 601 | par_c == 602 | par_c == 606 | par_c == 607 | par_c == 608 | par_c == 610 | par_c == 611 | par_c == 615 | par_c == 616   //padres,padrastos, abuelos ...
		replace parentes = 5 if par_c == 609 | par_c == 610 | par_c == 617  //nietos, bisnietos ...
		replace parentes = 6 if par_c == 603 | par_c == 604 | par_c == 605  //parentes colaterals
		replace parentes = 7 if par_c == 612 | par_c == 613 | par_c == 614 | par_c == 618  //padres,padrastos, abuelos ...
		replace parentes = 8 if par_c >= 501 & par_c <= 503  //nao parentes ...
		replace parentes = 9 if par_c >= 401 & par_c <= 461 //criados
		replace parentes = . if parentes == 999
		drop par_c

		ge head = parentes == 1
		ge spouse = parentes == 2

		ge age = eda
		replace age = . if age == 98 | age == 99
		ge aux = age <= 5 & parentes == 3
		bys  cd_a ent con v_sel n_hog h_mud: egen dchild0_5 = max(aux)
		lab var dchild0_5 "Children 0-5 in fam"
		drop aux

		ge aux = age <= 14 & parentes == 3
		bys  cd_a ent con v_sel n_hog h_mud: egen dchild0_14 = max(aux)
		lab var dchild0_14 "Children 0-14 in fam"
		drop aux


		ge aux = age >= 65 & age !=. & parentes == 4
		bys  cd_a ent con v_sel n_hog h_mud: egen elderly = max(aux)
		lab var elderly  "Elderly in famly"
		drop aux

		*Mothers and children
		gen n_hij_b=n_hij
		replace n_hij_b=. if n_hij_b==0 | n_hij_b==99
		di "Number of Chidren"

		gen mother=1 if n_hij>=1 & sex==2
		di "Number of Mothers"


		*wages
		gen lwage=ing_x_hrs if ing_x_hrs >= 1 & ing_x_hrs !=. 

		* Variables (task 3)
		rename c_ocu11c occupation
		rename rama_est1 sector
		rename rama_est2 sector_detailed
		rename ambito2 ambito

		drop eda*c    
		gen quarter=20`Y'`Q'
		drop cs_* zona
		save "`savedata'sdemt`Q'`Y'_d.dta", replace
		clear
    }
}



****************************
*employment status - unindo as bases coe1t e coe2t
****************************
* Precisei separar o código em dois pois os questionários são diferentes e tem mudança no nome de variaveis
****************************
* GRUPO 1 - QUESTONARIO AMPLIADO
* variavel p5f
* Períodos :
		* 2005 - todos
		* 2006 a 2008: Q2
		* 2006, 2009 a 2014: Q1
****************************
foreach Y in 05  {
	forval Q =1/4 {
	use "`origdata'20`Y'_Q`Q'/coe2t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		tempfile coe2t
		save `coe2t'

		use "`origdata'20`Y'_Q`Q'/coe1t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		merge 1:1 cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda using `coe2t'
		destring, replace

		ge work = p1
		recode work (2=0)

		** last time looked for job (available in basico & empliado)
		* time since lost job is no avail for quarter 1 in all years
		ge year_search = p2b_anio
		replace year_search = . if year_search == 9999
		ge mon_search = p2b_mes
		replace mon_search = . if  mon_search == 99

		ge date_search = .
		format date_search %tm
		replace date_search = ym(year_search,mon_search)

		rename p1c motive_notwork 
		ge health_shock_nw = 0 if work == 0 & p1e!=1
		replace health_shock_nw = 1 if (motive_notwork == 6 | p2e == 5 | (p2g1 == 1 & p2g2 == 7) | (p2g1 == 1 & p2g2 == 9)) & p1e!=1  //not working or looking for work due to health
		lab var health_shock_nw "Not working due to health motives"

		rename p5f motive_notworko
		ge health_shock_w = 0 if work == 1 | p1e==1
		replace health_shock_w = 1 if (motive_notworko == 9  |  motive_notworko == 10) & (work == 1 | p1e==1) //working reduced hours due to health
		lab var health_shock_w "Health shock if working"

		****************************
		*informality
		****************************

		* Definition of informality: access to social security through job
		ge socsec = 1 		if p6d >= 1 & p6d <= 5 /*formal*/
		replace socsec = 2	if p6d == 6 /*no access*/
		replace socsec = 3 	if p6d == 9 /*dont know*/
		la define socsec 1 "Access to Medical Services" 2 "No Access" 3	"Dont Know"
		la val socsec socsec

		rename p6d med_assis

		keep work p1 date_search year_search mon_search  cd_a ent con v_sel n_hog h_mud n_ren p3r_anio p3r_mes health_shock_nw health_shock_w socsec med_assis
		gen quarter=20`Y'`Q'
		sort  cd_a ent con v_sel n_hog h_mud n_ren  quarter
		save "`savedata'coe1t`Q'`Y'_d.dta", replace
	}
}


foreach Y in 06 09 10 11 12 {
	forval Q = 1/1 {
	use "`origdata'20`Y'_Q`Q'/coe2t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		tempfile coe2t
		save `coe2t'

		use "`origdata'20`Y'_Q`Q'/coe1t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		merge 1:1 cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda using `coe2t'
		destring, replace

		ge work = p1
		recode work (2=0)

		rename p1c motive_notwork 
		ge health_shock_nw = 0 if work == 0 & p1e!=1
		replace health_shock_nw = 1 if (motive_notwork == 6 | p2e == 5 | (p2g1 == 1 & p2g2 == 7) | (p2g1 == 1 & p2g2 == 9)) & p1e!=1  //not working or looking for work due to health
		lab var health_shock_nw "Not working due to health motives"

		rename p5f motive_notworko
		ge health_shock_w = 0 if work == 1 | p1e==1
		replace health_shock_w = 1 if (motive_notworko == 9  |  motive_notworko == 10) & (work == 1 | p1e==1) //working reduced hours due to health
		lab var health_shock_w "Health shock if working"

		****************************
		*informality
		****************************

		* Definition of informality: access to social security through job
		ge socsec = 1 		if p6d >= 1 & p6d <= 5 /*formal*/
		replace socsec = 2	if p6d == 6 /*no access*/
		replace socsec = 3 	if p6d == 9 /*dont know*/
		la define socsec 1 "Access to Medical Services" 2 "No Access" 3	"Dont Know"
		la val socsec socsec

		rename p6d med_assis

		keep work p1 date_search year_search mon_search  cd_a ent con v_sel n_hog h_mud n_ren p3r_anio p3r_mes health_shock_nw health_shock_w socsec med_assis
		gen quarter=20`Y'`Q'
		sort  cd_a ent con v_sel n_hog h_mud n_ren  quarter
		save "`savedata'coe1t`Q'`Y'_d.dta", replace
	}
}

foreach Y in 06 07 08  {
	forval Q =2/2 {
	use "`origdata'20`Y'_Q`Q'/coe2t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		tempfile coe2t
		save `coe2t'

		use "`origdata'20`Y'_Q`Q'/coe1t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		merge 1:1 cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda using `coe2t'
		destring, replace

		ge work = p1
		recode work (2=0)

		rename p1c motive_notwork 
		ge health_shock_nw = 0 if work == 0 & p1e!=1
		replace health_shock_nw = 1 if (motive_notwork == 6 | p2e == 5 | (p2g1 == 1 & p2g2 == 7) | (p2g1 == 1 & p2g2 == 9)) & p1e!=1  //not working or looking for work due to health
		lab var health_shock_nw "Not working due to health motives"

		rename p5f motive_notworko
		ge health_shock_w = 0 if work == 1 | p1e==1
		replace health_shock_w = 1 if (motive_notworko == 9  |  motive_notworko == 10) & (work == 1 | p1e==1) //working reduced hours due to health
		lab var health_shock_w "Health shock if working"

		****************************
		*informality
		****************************

		* Definition of informality: access to social security through job
		ge socsec = 1 		if p6d >= 1 & p6d <= 5 /*formal*/
		replace socsec = 2	if p6d == 6 /*no access*/
		replace socsec = 3 	if p6d == 9 /*dont know*/
		la define socsec 1 "Access to Medical Services" 2 "No Access" 3	"Dont Know"
		la val socsec socsec

		rename p6d med_assis

		keep work p1 date_search year_search mon_search  cd_a ent con v_sel n_hog h_mud n_ren p3r_anio p3r_mes health_shock_nw health_shock_w socsec med_assis
		gen quarter=20`Y'`Q'
		sort  cd_a ent con v_sel n_hog h_mud n_ren  quarter
		save "`savedata'coe1t`Q'`Y'_d.dta", replace
	}
}



****************************
* GRUPO 2 - QUESTONARIO BASICO
****************************
* variavel p5e 
* não tem variáveis: p3r_anio p3r_mes
* Períodos :
		* 2006 a 2008 : Q3 e 4
		* 2007 e 2008 : Q1 
		* 2009 a 2012 : Q2, 3 e 4   
****************************
foreach Y in 07 08 {
	forval Q = 1/1 {
	use "`origdata'20`Y'_Q`Q'/coe2t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		tempfile coe2t
		save `coe2t'

		use "`origdata'20`Y'_Q`Q'/coe1t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		merge 1:1 cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda using `coe2t'
		destring, replace


		ge work = p1
		recode work (2=0)

		rename p1c motive_notwork 
		ge health_shock_nw = 0 if work == 0 & p1e!=1
		replace health_shock_nw = 1 if motive_notwork == 6 | p2e == 5 | (p2g1 == 1 & p2g2 == 7) | (p2g1 == 1 & p2g2 == 9) & p1e!=1 //not working or looking for work due to health
		lab var health_shock_nw "Not working due to health motives"

		rename p5e motive_notworko
		ge health_shock_w = 0 if work == 1 | p1e==1
		replace health_shock_w = 1 if  (motive_notworko == 9 |  motive_notworko == 10) & (work == 1 | p1e==1) //working reduced hours due to health
		** 11 otros motivos personales o familiares
		lab var health_shock_w "Health shock if working"

		****************************
		*informality
		****************************
		* Definition of informality: access to social security through job
		ge socsec = 1 		if p6d >= 1 & p6d <= 5 /*formal*/
		replace socsec = 2	if p6d == 6 /*no access*/
		replace socsec = 3 	if p6d == 9 /*dont know*/
		la define socsec 1 "Access to Medical Services" 2 "No Access" 3	"Dont Know"
		la val socsec socsec

		rename p6d med_assis

		keep work p1 date_search year_search mon_search  cd_a ent con v_sel n_hog h_mud n_ren health_shock_nw health_shock_w  socsec med_assis
		gen quarter=20`Y'`Q'
		sort  cd_a ent con v_sel n_hog h_mud n_ren  quarter
		save "`savedata'coe1t`Q'`Y'_d.dta", replace
	}
}



foreach Y in 06 07 08 {
	forval Q = 3/4 {
	use "`origdata'20`Y'_Q`Q'/coe2t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		tempfile coe2t
		save `coe2t'

		use "`origdata'20`Y'_Q`Q'/coe1t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		merge 1:1 cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda using `coe2t'
		destring, replace


		ge work = p1
		recode work (2=0)


		rename p1c motive_notwork 
		replace health_shock_nw = 1 if motive_notwork == 6 | p2e == 5 | (p2g1 == 1 & p2g2 == 7) | (p2g1 == 1 & p2g2 == 9)  & p1e!=1 //not working or looking for work due to health
		*p2i not in 2005, 6
		lab var health_shock_nw "Not working due to health motives"
* p2e = 5: una persona con alguna limitación física o mental que le impide trabajar por el resto de su vida?
		
		
		rename p5e motive_notworko
		ge health_shock_w = 0 if work == 1 | p1e==1
		replace health_shock_w = 1 if  (motive_notworko == 9 |  motive_notworko == 10) & (work == 1 | p1e==1) //working reduced hours due to health
		** 11 otros motivos personales o familiares
		lab var health_shock_w "Health shock if working"

		****************************
		*informality
		****************************
		* Definition of informality: access to social security through job
		ge socsec = 1 		if p6d >= 1 & p6d <= 5 /*formal*/
		replace socsec = 2	if p6d == 6 /*no access*/
		replace socsec = 3 	if p6d == 9 /*dont know*/
		la define socsec 1 "Access to Medical Services" 2 "No Access" 3	"Dont Know"
		la val socsec socsec

		rename p6d med_assis

		keep work p1 date_search year_search mon_search  cd_a ent con v_sel n_hog h_mud n_ren health_shock_nw health_shock_w  socsec med_assis
		gen quarter=20`Y'`Q'
		sort  cd_a ent con v_sel n_hog h_mud n_ren  quarter
		save "`savedata'coe1t`Q'`Y'_d.dta", replace
	}
}


foreach Y in 09 10 11 12 {
	forval Q = 2/4 {
	use "`origdata'20`Y'_Q`Q'/coe2t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		tempfile coe2t
		save `coe2t'

		use "`origdata'20`Y'_Q`Q'/coe1t`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda, generate(dup)
		drop if dup == 1
		drop dup
		merge 1:1 cd_a n_ent con v_sel n_hog h_mud n_ren n_pro_viv upm d_sem eda using `coe2t'
		destring, replace


		ge work = p1
		recode work (2=0)

		** last time looked for job (available in basico & empliado)
		* time since lost job is no avail for quarter 1 in all years
		ge year_search = p2b_anio
		replace year_search = . if year_search == 9999
		ge mon_search = p2b_mes
		replace mon_search = . if  mon_search == 99

		ge date_search = .
		format date_search %tm
		replace date_search = ym(year_search,mon_search)

		rename p1c motive_notwork 
		ge health_shock_nw = 0 if work == 0 & p1e!=1 
		replace health_shock_nw = 1 if motive_notwork == 6 | p2e == 5 | (p2g1 == 1 & p2g2 == 7) | (p2g1 == 1 & p2g2 == 9)  & p1e!=1 //not working or looking for work due to health
		*p2i not in 2005, 6
		lab var health_shock_nw "Not working due to health motives"
* p2e = 5: una persona con alguna limitación física o mental que le impide trabajar por el resto de su vida?
		
		
		rename p5e motive_notworko
		ge health_shock_w = 0 if work == 1 | p1e==1
		replace health_shock_w = 1 if  (motive_notworko == 9 |  motive_notworko == 10) & (work == 1 | p1e==1) //working reduced hours due to health
		** 11 otros motivos personales o familiares
		lab var health_shock_w "Health shock if working"

		****************************
		*informality
		****************************
		* Definition of informality: access to social security through job
		ge socsec = 1 		if p6d >= 1 & p6d <= 5 /*formal*/
		replace socsec = 2	if p6d == 6 /*no access*/
		replace socsec = 3 	if p6d == 9 /*dont know*/
		la define socsec 1 "Access to Medical Services" 2 "No Access" 3	"Dont Know"
		la val socsec socsec

		rename p6d med_assis

		keep work p1 date_search year_search mon_search  cd_a ent con v_sel n_hog h_mud n_ren health_shock_nw health_shock_w  socsec med_assis
		gen quarter=20`Y'`Q'
		sort  cd_a ent con v_sel n_hog h_mud n_ren  quarter
		save "`savedata'coe1t`Q'`Y'_d.dta", replace
	}
}

****************************
*hogar
****************************
forval Q =1/4 {
	foreach Y in 05 06 07 08 09 10 11 12{
		use "`origdata'20`Y'_Q`Q'/hogt`Q'`Y'.dta", clear
		qui renvars *, lower
		duplicates tag cd_a n_ent con v_sel n_hog h_mud, generate(dup)

		drop if dup == 1
		drop dup
		destring, replace

		* date of survey
		ge date_survey = .
		format date_survey %td
		replace p_anio = p_anio + 2000
		replace date_survey = mdy(p_mes, p_dia,p_anio)

		keep date_survey p_mes p_dia p_anio cd_a ent con v_sel n_hog h_mud 
		gen quarter=20`Y'`Q'

		* nesta base não temos a variavel n_ren então mudei o código
// 		bys cd_a ent con v_sel n_hog h_mud n_ren: ge n =_N
		bys cd_a ent con v_sel n_hog h_mud : ge n =_N
		tab n

		sort cd_a ent con v_sel n_hog h_mud  quarter
		save "`savedata'hogt`Q'`Y'_d.dta", replace
	}
}


****************************
*merge
****************************
* Etapa 1 : 3 bases em 1 por ano e trimestre
foreach Y in 05 06 07 08 09 10 11 12{
	forval Q =1/4 {
		use "`savedata'sdemt`Q'`Y'_d.dta", clear
		sort cd_a ent con v_sel n_hog h_mud n_ren quarter
		merge cd_a ent con v_sel n_hog h_mud n_ren quarter using "`savedata'coe1t`Q'`Y'_d.dta"
		tab _merge
		drop _merge
		sort cd_a ent con v_sel n_hog h_mud quarter
		merge cd_a ent con v_sel n_hog h_mud quarter using "`savedata'hogt`Q'`Y'_d.dta"

		drop if n_ren == . | nac_dia == . | nac_mes == . | nac_anio == . | sex == .

		tab _merge
		drop _merge
		save "`savedata'merge`Q'`Y'_d.dta", replace
	}
}



* Etapa 2 : todos os anos e trimestres
use "`savedata'merge105_d.dta", clear
for num 2/4: append using "`savedata'mergeX05_d.dta"
foreach num in 06 07 08 09 10 11 12 {
	for num 1/4: append using "`savedata'mergeX`num'_d.dta"
}

tostring quarter, replace
ge year = substr(quarter,1,4)
ge qt = substr(quarter,-1,1)
destring quarter year qt, replace

bys cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex: ge att = _N
tab att

** make sure there are no repeated ind!
drop n
bys cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex n_ent: ge n =_N
tab n, mi
drop if n > 1
bys cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex n_ent (qt year): ge n2 =_n
tab n2
drop n2

** drop observ to which we can't assign unique id
drop att
bys cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex: ge att = _N
tab att, mi

** identify ampliados
ge ampliado = 0
replace ampliado = 1 if year == 2005 | (year == 2006 & qt >= 1 & qt <= 2) | (year == 2007 & qt == 2) |  /*
*/ (year == 2008 & qt == 2) | (year == 2009 & qt == 1) | (year == 2010 & qt == 1) | (year == 2011 & qt == 1) | (year == 2012 & qt == 1) 

ge insurance = o_seg_soc if o_seg_soc > 0 & o_seg_soc < 3 & year >= 2005 & year <= 2009
replace insurance = a_seg_soc if a_seg_soc > 0 & a_seg_soc < 3 & year >= 2005 & year <= 2009 & insurance == . 
replace insurance = seg_soc if seg_soc > 0 & seg_soc < 3 & year >= 2010 & year <= 2012
recode insurance (2=0) 

drop tue1 tue2 tue3 t_subrem t_subrem1 *survey* ageb  *seg_soc* *rama* d_ant_lab ///
	upm  medica5c remune2c *48m1sm p14apoyos sub_rem2c pnea_est anios_esc ///
	emp_ppal tue_ppal trans_ppal mh_fil2 mh_col ma48me1sm n busqueda ambito1 buscar5c /// 
	local9c  dur9c c_inac5c sub_busque sub_o_est s_clasifi p1  domestico ambito1  ///
	sub_o_est2 as_no_agr d_cex tpg_p8a fac_old tipo p_ent_l eda_ante
compress
save "`savedata'eneo_clean05_12_rev", replace


* apaga as bases merge
foreach num in 05 06 07 08 09 10 11 12 {
	for num 1/4: erase "`savedata'mergeX`num'_d.dta"
}


**# ---------------------------APPEND --------------------------------------------------------------
* Ajustes na base 2005-2012
use "`savedata'eneo_clean05_12_rev", clear
bys cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex n_ent (qt year): ge n2 =_n
tab n2
drop n2 att 

ge eneo = 1

recode nac_dia (99=.)
recode nac_mes (99=.)
recode nac_anio (9999=.)
drop if nac_dia ==. | nac_mes ==. | nac_anio ==. 
tab quarter
keep if n_ent < 6

sort cd_a ent con v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex n_ent
tab quarter

ge zero = "0"

ge con2 = ""
ge aux = con
tostring con, replace
egen con2b = concat(zero con) if aux > 99 & aux < 1000
egen con2c = concat(zero zero con) if aux > 9 & aux < 100
egen con2d = concat(zero zero zero con) if aux < 10

replace con2 = con if aux > 999 & aux !=. 
replace con2 = con2b if aux > 99 & aux < 1000
replace con2 = con2c if aux > 9 & aux < 100
replace con2 = con2d if aux < 10
destring con, replace
drop aux con2b con2c con2d

ge ent2 = ""
ge aux = ent 
tostring ent, replace
egen ent2b = concat(zero ent) if aux < 10

replace ent2 = ent if aux > 9 & aux !=. 
replace ent2 = ent2b if aux < 10
destring ent, replace
drop ent2b zero aux 

sort quarter 
egen n_enc = group(quarter)

egen n_grup=concat(n_ent n_enc)
destring  n_grup, replace

gen grup=.
 
quietly{
	replace grup=1 if n_grup==41 |n_grup==52
	replace grup=2 if n_grup==31 |n_grup==42 | n_grup==53
	replace grup=3 if n_grup==21 |n_grup==32 | n_grup==43 |n_grup==54
	replace grup=4 if n_grup==11 |n_grup==22 | n_grup==33 |n_grup==44 |n_grup==55
	replace grup=5 if n_grup==12 |n_grup==23 | n_grup==34 |n_grup==45 |n_grup==56
	replace grup=6 if n_grup==13 |n_grup==24 | n_grup==35 |n_grup==46 |n_grup==57
	replace grup=7 if n_grup==14 |n_grup==25 | n_grup==36 |n_grup==47 |n_grup==58
	replace grup=8 if n_grup==15 |n_grup==26 | n_grup==37 |n_grup==48 |n_grup==59
	replace grup=9 if n_grup==16 |n_grup==27 | n_grup==38 |n_grup==49 |n_grup==510
	replace grup=10 if n_grup==17 |n_grup==28 | n_grup==39 |n_grup==410 |n_grup==511
	replace grup=11 if n_grup==18 |n_grup==29 | n_grup==310 |n_grup==411 |n_grup==512
	replace grup=12 if n_grup==19 |n_grup==210 | n_grup==311 |n_grup==412 |n_grup==512
	replace grup=13 if n_grup==110 |n_grup==211 | n_grup==312 |n_grup==413 |n_grup==514
	replace grup=14 if n_grup==111 |n_grup==212 | n_grup==313 |n_grup==414 |n_grup==515
	replace grup=15 if n_grup==112 |n_grup==213 | n_grup==314 |n_grup==415 |n_grup==516
	replace grup=16 if n_grup==113 |n_grup==214 | n_grup==315 |n_grup==416 |n_grup==517
	replace grup=17 if n_grup==114 |n_grup==215 | n_grup==316 |n_grup==417 |n_grup==518
	replace grup=18 if n_grup==115 |n_grup==216 | n_grup==317 |n_grup==418 |n_grup==519
	replace grup=19 if n_grup==116 |n_grup==217 | n_grup==318 |n_grup==419 |n_grup==520
	replace grup=20 if n_grup==117 |n_grup==218 | n_grup==319 |n_grup==420 |n_grup==521
	replace grup=21 if n_grup==118 |n_grup==219 | n_grup==320 |n_grup==421 |n_grup==522
	replace grup=22 if n_grup==119 |n_grup==220 | n_grup==321 |n_grup==422 |n_grup==523
	replace grup=23 if n_grup==120 |n_grup==221 | n_grup==322 |n_grup==423 |n_grup==524
	replace grup=24 if n_grup==121 |n_grup==222 | n_grup==323 |n_grup==424 |n_grup==525
	replace grup=25 if n_grup==122 |n_grup==223 | n_grup==324 |n_grup==425 |n_grup==526
	replace grup=26 if n_grup==123 |n_grup==224 | n_grup==325 |n_grup==426 |n_grup==527
	replace grup=27 if n_grup==124 |n_grup==225 | n_grup==326 |n_grup==427 |n_grup==528
	replace grup=28 if n_grup==125 |n_grup==226 | n_grup==327 |n_grup==428 |n_grup==529
	replace grup=29 if n_grup==126 |n_grup==227 | n_grup==328 |n_grup==429 |n_grup==530
	replace grup=30 if n_grup==127 |n_grup==228 | n_grup==329 |n_grup==430 |n_grup==531
	replace grup=31 if n_grup==128 |n_grup==229 | n_grup==330 |n_grup==431 |n_grup==532
	replace grup=32 if n_grup==129 |n_grup==230 | n_grup==331 |n_grup==432 |n_grup==533
	replace grup=33 if n_grup==130 |n_grup==231 | n_grup==332 |n_grup==433 |n_grup==534
	replace grup=34 if n_grup==131 |n_grup==232 | n_grup==333 |n_grup==434 |n_grup==535
	replace grup=35 if n_grup==132 |n_grup==233 | n_grup==334 |n_grup==435 |n_grup==536
	replace grup=36 if n_grup==133 |n_grup==234 | n_grup==335 |n_grup==436 |n_grup==537
	replace grup=37 if n_grup==134 |n_grup==235 | n_grup==336 |n_grup==437 |n_grup==538
	replace grup=38 if n_grup==135 |n_grup==236 | n_grup==337 |n_grup==438 |n_grup==539
	replace grup=39 if n_grup==136 |n_grup==237 | n_grup==338 |n_grup==439 |n_grup==540
	replace grup=40 if n_grup==137 |n_grup==238 | n_grup==339 |n_grup==440
	replace grup=41 if n_grup==138 |n_grup==239 | n_grup==340
	replace grup=42 if n_grup==139 |n_grup==240
}

keep if n_hog == 1
egen idh=concat(cd_a ent2 con2 v_sel n_hog h_mud)
egen idh3=concat(idh grup)

egen id_ia = group(cd_a ent2 con2 v_sel n_hog h_mud n_ren est l_nac_c nac_dia nac_mes nac_anio sex)
bys id_ia: ge n2 = _N
tab n2
drop n2

ge edu = niv_inst_t if year >= 2005 & year <= 2009
replace edu = niv_ins if year >= 2010 & year <= 2012

tostring v_sel c_res, replace

compress
drop *l_nac* d_sem n_pro_viv n_hog h_mud n_ren ur *l_nac* ///
	nac_dia nac_anio nac_mes n_hij par_ind dur_est ing_asa dur_asa pre_asa tip_con tcco ///
	cp_anoc imssissste ampliado t_tra con2 ent2 n_grup grup n_grup 
compress
save "`savedata'eneo_clean05_12_temp", replace


*-----------------------
* Ajustes na base 2000-2004: some variables need to be uniform across data sets
use "`savedata'eneo_clean00_04_rev", clear
ge ene = 1

*-----------------------
* Agora append 

append using "`savedata'eneo_clean05_12_temp"
tostring id_ia, replace
ge id = id_ia if year >= 2005 & year <= 2012
replace id = id_ib if year >= 2000 & year <= 2004
egen idi = group(id)
drop id id_ib id_ia

ge id_hha = idh3 if year >= 2005 & year <= 2012
replace id_hha = idh2 if year >= 2000 & year <= 2004
egen id_hh = group(id_hha)
drop id_hha

ge type_ocu = o_pos_ocu if year < 2010
replace type_ocu = pos_ocu if year >= 2010

* deflate
sort qt year
merge qt year using "C:\Users\rgi021\Dropbox\Mexico Labor\Data\CPI Mexico\cpi"
tab _merge 
keep if _merge == 3
drop _merge

ge newcpi = cpi/100

rename  ing7c wagemin 
lab var wagemin "Wage Relative to Minimum Wage"
lab define wagemin 0 "Does not Apply" 1 "up to MW" 2 "1 to 2 MW" 3 "2 to 3 MW" 4 "3 to 5 MW" 5 "More than 5x MW" 6 "No labor income" 7 "Not speficied"
lab value wagemin wagemin

ge hwaged =  ing_x_hrs/newcpi
ge salaryd = ingocup/newcpi

keep if eda>=15 & eda<=80 & (parentes == 1 | parentes == 2)
// drop married
ge married = e_con == 1 | e_con == 5 if e_con > 0 & e_con <9
drop e_con

* Task 2 (firm size by year)
rename emple7c firm_size_task2
* Task 4 (movement across formal and informal sectors)
/*clase1: 1 - economically active; 2 - not active

clase2:
1 - ocupado
2 - desocupado
3 - disponiveis
4 - nao disponiveis
*/ 

* Situation
gen situation1=1 if clase2 == 2 | clase2 == 3 | clase2 == 4
replace situation1=2 if clase2 == 1 & insurance==1
replace situation1=3 if clase2 == 1 & insurance==0

* get the rollout 
drop if mun ==0
bys ent mun: ge n = _n
// keep if n == 1
su n if n==1
drop n

* Drops 
drop n_hij year_search mon_search date_search p_dia p_mes p_anio est_d dur_des scian sec_ins ///
	idh3 age hij5c dispo nodispo sub_o fac status dur_busq  ///
	con v_sel hog n_ent c_res t_res t_hog l_viv n_enc nom cd_a par mun2 salario 


* Labels
order _all, alphabetic 

* categorias
label define codsituation 1 "Desocupado/inativo" 2 "Ocupado com Insurance" 3 "Ocupado sem insurance"
label values situation1 codsituation

   
* Labels
label variable ambito "Población ocupada- Tamano de la unidad económica subtotales"
label variable ent "Entidad"
label variable est "Estrato"
label variable firm_size_task2 "Firm_size todos os anos"
label variable head "1 if Household head"
label variable head_age "Idade do HH"
label variable head_male "1 if Male HH"
label variable hwaged "Hourly wage - corrigido"
label variable idh "Id do domicilio"
label variable insurance "Acceso a instituciones de salud"
label variable loc "Localidad"
label variable lwage "ln(hourly wage) sem correcao"
label variable married "1 if married"
label variable med_assis "Acceso a atención médica - ENOE apenas"
*label variable medica5c "Población ocupada: Prestaciones de salud"
label variable mother "1 if mother"
label variable mun "Municipio"
label variable newcpi "Inflation index / 100"
*label variable o_pos_ocu "Posicao ocupacao ENE"
*label variable o_rama "Sector de actividad económica - ENOE"
label variable occupation "Condición de ocupación"
label variable p3r_anio "En qué ano entro a trabajar por primera vez para su actual empresa"
label variable p3r_mes "En qué mes entro a trabajar por primera vez para su actual empresa"
label variable parentes "Parentes"
label variable per "Periodo"
label variable pos_ocu "Posicao ocupacao ENOE"
label variable qt "Quarter"
label variable quarter "Quarter"
label variable r_def "Resultado definitivo de la entrevista"
label variable s_pos_ocu "Posicao ocupacao ENE"
label variable salaryd "Monthly wage - corrigido"
label variable sector "Sector de actividad económica 4g"
label variable sector_detailed "Sector de actividad económica 11g"
*label variable seg_soc "Población ocupada por condición de acceso a instituciones de salud"
label variable sex "Sexo"
label variable situation1 "Situacao "
label variable socsec "Access to social security through job"
label variable spouse "Spouse"
label variable t_loc "Tamano de localidad"
label variable type_ocu "Posicao ocupacao final"
label variable work "Realizó una actividad que le proporcionó ingresos?"

   
* Base final  
compress 
save "`savedata'eneo_clean00_12_rev", replace 
  